In [39]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [40]:
df = pd.read_csv("data/1.32.csv")
In [41]:
pd.__version__
Out[41]:
In [42]:
np.__version__
Out[42]:
In [43]:
#df["id"] = df["id"].astype(str)
df["userTime"] = df["userTime"].astype(pd.Timestamp)
df["serverTime"] = df["serverTime"].astype(np.datetime64)
#df["gameVersion"] = df["gameVersion"].astype(str)
#df["gameVersion"] = df["gameVersion"].astype(str)
#df["gameVersion"] = df["gameVersion"].astype(str)
df["gameVersion"] = df["gameVersion"].astype(str)
In [44]:
df.head()
Out[44]:
In [45]:
#df.groupby("gameVersion").size().plot(kind="bar")
# plt.figure();
# df["gameVersion"].astype(str).plot(kind="hist")
df["gameVersion"].value_counts()
Out[45]:
Number of unique game sessions
In [46]:
playerscount = df["playerId"].nunique()
playerscount
Out[46]:
In [47]:
eventstable = pd.DataFrame({"events" : df["playerId"].value_counts().values})
eventstable.plot()
plt.ylabel("events count")
plt.xlabel("player index")
plt.legend('')
Out[47]:
In [48]:
deathscount = df[df["type"]=="death"]["id"].count()
deathscount
Out[48]:
In [49]:
deathscount / float(playerscount)
Out[49]:
In [50]:
deadplayers = df[df["type"]=="death"]["playerId"].value_counts()
deadplayerscount = deadplayers.count()
deadplayerscount
Out[50]:
In [51]:
deathtable = pd.DataFrame({"deaths" : deadplayers.values})
deathtable.plot()
plt.xlabel("player index")
plt.ylabel("deaths")
plt.legend('')
deathtable.describe()
Out[51]:
In [88]:
neverdeadseries = pd.Series(0, index=(list(range(deadplayerscount,playerscount-1))))
neverdead = pd.DataFrame({"deaths":neverdeadseries})
alldeathtable = pd.concat([deathtable, neverdead])
ax = alldeathtable.plot()
plt.xlabel("player index")
plt.ylabel("deaths")
plt.legend('')
alldeathtable.describe()
Out[88]:
Number of devices equipped by players
In [53]:
equipscount = df[df["type"]=="equip"]["id"].count()
equipscount
Out[53]:
Mean number of devices equipped by players
In [54]:
mean = equipscount / float(playerscount)
mean
Out[54]:
Number of players who equipped devices
In [55]:
equipedplayers = df[df["type"]=="equip"]["playerId"].value_counts()
equipedplayerscount = equipedplayers.count()
equipedplayerscount
Out[55]:
In [56]:
equiptable = pd.DataFrame({"equips" : equipedplayers.values})
equiptable.plot()
plt.xlabel("player index")
plt.ylabel("equipped devices")
plt.legend('')
equiptable.describe()
neverequipedseries = pd.Series(0, index=(list(range(equipedplayerscount,playerscount-1))))
neverequiped = pd.DataFrame({"equips":neverequipedseries})
allequiptable = pd.concat([equiptable, neverequiped])
allequiptable.plot()
plt.xlabel("player index")
plt.ylabel("equipped devices")
plt.legend('')
allequiptable.describe()
Out[56]:
In [57]:
craftscount = df[df["type"]=="craft"]["id"].count()
craftscount
Out[57]:
In [58]:
mean = craftscount / float(playerscount)
mean
Out[58]:
In [59]:
craftedplayers = df[df["type"]=="craft"]["playerId"].value_counts()
craftedplayerscount = craftedplayers.count()
craftedplayerscount
Out[59]:
In [60]:
crafttable = pd.DataFrame({"crafts" : craftedplayers.values})
crafttable.plot()
plt.xlabel("player index")
plt.ylabel("crafted devices")
plt.legend('')
crafttable.describe()
nevercraftedseries = pd.Series(0, index=(list(range(craftedplayerscount,playerscount-1))))
nevercrafted = pd.DataFrame({"crafts":nevercraftedseries})
allcrafttable = pd.concat([crafttable, nevercrafted])
allcrafttable.plot()
plt.xlabel("player index")
plt.ylabel("crafted devices")
plt.legend('')
allcrafttable.describe()
Out[60]:
Number of finished games
In [61]:
completescount = df[df["type"]=="complete"]["id"].count()
completescount
Out[61]:
Mean number of games finished by players
In [62]:
mean = completescount / float(playerscount)
mean
Out[62]:
Number of players who finished a game
In [63]:
completedplayers = df[df["type"]=="complete"]["playerId"].value_counts()
completedplayerscount = completedplayers.count()
completedplayerscount
Out[63]:
In [64]:
completetable = pd.DataFrame({"completes" : completedplayers.values})
completetable.plot()
plt.xlabel("player index")
plt.ylabel("completed games")
plt.legend('')
completetable.describe()
nevercompletedseries = pd.Series(0, index=(list(range(completedplayerscount,playerscount-1))))
nevercompleted = pd.DataFrame({"completes":nevercompletedseries})
allcompletetable = pd.concat([completetable, nevercompleted])
allcompletetable.plot()
plt.xlabel("player index")
plt.ylabel("completed games")
plt.legend('')
allcompletetable.describe()
Out[64]:
Checkpoint reached by players
In [65]:
checkpoints = df[df["type"]=="reach"]["section"].value_counts()
checkpoints = checkpoints.sort_index()
checkpoints
Out[65]:
In [66]:
checkpointstable = pd.DataFrame({"checkpoints" : checkpoints.values})
checkpointstable.plot()
plt.ylabel("number of players")
plt.xlabel("checkpoints reached")
plt.legend('')
Out[66]:
In [67]:
checkpointsbyplayer = df[df["type"]=="reach"].loc[:,['section','playerId']]
grouped = checkpointsbyplayer.groupby("playerId")
maxCheckpointWithIDs = grouped.max()
maxCheckpointWithIDs.head()
Out[67]:
In [68]:
reachedplayers = df[df["type"]=="reach"]["playerId"].value_counts()
reachedplayerscount = reachedplayers.count()
reachedplayerscount
Out[68]:
In [69]:
maxCheckpointTable = pd.DataFrame({"maxCheckpoint" : maxCheckpointWithIDs.values.flatten()})
maxCheckpointCounts = maxCheckpointTable["maxCheckpoint"].value_counts()
maxCheckpointCounts['Checkpoint00'] = 0
maxCheckpointCounts = maxCheckpointCounts.sort_index()
maxCheckpointCounts
Out[69]:
In [70]:
maxCheckpointCountsTable = pd.DataFrame({"maxCheckpoint" : maxCheckpointCounts.values})
maxCheckpointCountsTable.plot()
plt.legend('')
plt.ylabel("number of players")
plt.xlabel("best checkpoint reached")
maxCheckpointCountsTable.describe()
Out[70]:
In [71]:
maxCheckpointCountsTable.head()
Out[71]:
In [72]:
#allreachedtable = maxCheckpointCountsTable
#allreachedtable.at[0, 'maxCheckpoint'] = playerscount - reachedplayerscount
#allreachedtable.plot()
maxCheckpointCounts['Checkpoint00'] = playerscount - reachedplayerscount
maxCheckpointCountsTable = pd.DataFrame({"maxCheckpoint" : maxCheckpointCounts.values})
maxCheckpointCountsTable.plot(kind='bar')
plt.ylabel("number of players")
plt.xlabel("best checkpoint reached")
plt.legend('')
print maxCheckpointCountsTable.head()
maxCheckpointCountsTable.plot()
plt.ylabel("players (%)")
plt.xlabel("best checkpoint reached")
plt.legend('')
(maxCheckpointCountsTable*100/playerscount).plot(kind='bar')
plt.ylabel("players (%)")
plt.xlabel("best checkpoint reached")
plt.legend('')
maxCheckpointCountsTable.describe()
Out[72]:
In [73]:
df["type"].value_counts()
Out[73]:
In [74]:
df.describe()
Out[74]:
In [75]:
df.values[0]
Out[75]: